Analysis date: 2023-09-20

Depends on

CRC_Xenografts_Batch2_DataProcessing Script Xenografts_Batch1_2_DataProcessing Script

Setup

Load libraries and functions

Load data and process

Batch 1

Load

load("../Data/Cache/Xenografts_Batch1_2_DataProcessing.RData")

Process

Combine
all_pY_peptides_form <- rbind( (pY_Set1_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ), 
(pY_Set2_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ) ) %>%
  unique

pY_Batch1_form <- plyr::join_all(list(all_pY_peptides_form, pY_Set1_form,
                    pY_Set2_form), 
               by=c('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions' ), 
               type='left') %>% as_tibble() 

pY_Batch1_form
## # A tibble: 650 × 40
##    Annotated_Sequence  HGNC_Symbol Master.Protein.Acces…¹ Master.Protein.Descr…²
##    <chr>               <chr>       <chr>                  <chr>                 
##  1 ADGTNTGFPRyPNDSVYA… RET         P07949                 Proto-oncogene tyrosi…
##  2 ADHQPLTEASyVNLPTIA… RPSA        P08865                 40S ribosomal protein…
##  3 ADyDTLSLR           PKP3        Q9Y446                 Plakophilin-3 OS=Homo…
##  4 AEDGSVIDyELIDQDAR   ANXA2       P07355                 Annexin A2 OS=Homo sa…
##  5 AEERPTFDYLQSVLDDFy… LYN         P07948                 Tyrosine-protein kina…
##  6 AEGSDVANAVLDGADCIM… PKM         P14618                 Pyruvate kinase PKM O…
##  7 AESGPDLRyEVTSGGGGT… DSP         P15924                 Desmoplakin OS=Homo s…
##  8 AGSLPNyATINGK       TNS1        Q9HBL0                 Tensin-1 OS=Homo sapi…
##  9 ALELDSNLyR          MYH9        P35579                 Myosin-9 OS=Homo sapi…
## 10 ALNGAEPNyHSLPSAR    LAMTOR1     Q6IAA8                 Ragulator complex pro…
## # ℹ 640 more rows
## # ℹ abbreviated names: ¹​Master.Protein.Accessions, ²​Master.Protein.Descriptions
## # ℹ 36 more variables: log2FC_Xenograft_EC_24h_2_Set1 <dbl>,
## #   log2FC_Xenograft_EC_24h_5_Set1 <dbl>, log2FC_Xenograft_E_24h_4_Set1 <dbl>,
## #   log2FC_Xenograft_ctrl_5d_7_Set1 <dbl>,
## #   log2FC_Xenograft_ctrl_5d_1_Set1 <dbl>,
## #   log2FC_Xenograft_EC_24h_1_Set1 <dbl>, …
pY_mat_Batch1 <- pY_Batch1_form %>% 
  mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
  column_to_rownames("rname") %>%
  select(all_of( contains("Xenograft") )) %>%
  as.matrix() %>%
  t
pY_mat_Batch1[1:5,1:5]
##                                 RET_ADGTNTGFPRyPNDSVYANWMLSPSAAK
## log2FC_Xenograft_EC_24h_2_Set1                       -0.07401477
## log2FC_Xenograft_EC_24h_5_Set1                        0.21847224
## log2FC_Xenograft_E_24h_4_Set1                         0.22874181
## log2FC_Xenograft_ctrl_5d_7_Set1                       0.54095417
## log2FC_Xenograft_ctrl_5d_1_Set1                      -0.27106564
##                                 RPSA_ADHQPLTEASyVNLPTIALCNTDSPLR PKP3_ADyDTLSLR
## log2FC_Xenograft_EC_24h_2_Set1                        -0.5411173     0.64984600
## log2FC_Xenograft_EC_24h_5_Set1                         0.3552827    -0.84664124
## log2FC_Xenograft_E_24h_4_Set1                         -0.3858904     0.89538395
## log2FC_Xenograft_ctrl_5d_7_Set1                       -0.1045738     0.01867478
## log2FC_Xenograft_ctrl_5d_1_Set1                        1.0251007    -0.38241878
##                                 ANXA2_AEDGSVIDyELIDQDAR
## log2FC_Xenograft_EC_24h_2_Set1               0.16930587
## log2FC_Xenograft_EC_24h_5_Set1              -0.62007389
## log2FC_Xenograft_E_24h_4_Set1               -0.09273979
## log2FC_Xenograft_ctrl_5d_7_Set1              0.31250121
## log2FC_Xenograft_ctrl_5d_1_Set1              0.30194393
##                                 LYN_AEERPTFDYLQSVLDDFyTATEGQYQQQP
## log2FC_Xenograft_EC_24h_2_Set1                          0.4838867
## log2FC_Xenograft_EC_24h_5_Set1                          0.2236433
## log2FC_Xenograft_E_24h_4_Set1                          -0.5954406
## log2FC_Xenograft_ctrl_5d_7_Set1                         0.3808837
## log2FC_Xenograft_ctrl_5d_1_Set1                         0.1752776
By condition
All
pY_Batch1_by_cond <- pY_Batch1_form %>%
  pivot_longer(cols = contains("Xenograft") , names_to = "sample", values_to = "log2FC_over_ctrl") %>%
  mutate(sample = str_remove( sample, "log2FC_Xenograft") ) %>%
  separate( sample , into = c("xenograft", "treatment", "timepoint", "replicate", "set"), remove = F) %>%
  group_by(Annotated_Sequence, HGNC_Symbol, Master.Protein.Accessions, Master.Protein.Descriptions, xenograft, treatment,
           timepoint) %>%
  summarise(mean_log2FC_per_xeno_treat_time = mean(log2FC_over_ctrl, na.rm = T)) %>%
  ungroup() %>%
  mutate(sample_group = paste0("Xenograft4", xenograft, "_" , treatment, "_" , timepoint )) %>%
  select(-xenograft, -treatment, -timepoint) %>%
  pivot_wider( values_from =  mean_log2FC_per_xeno_treat_time, names_from = sample_group)
## `summarise()` has grouped output by 'Annotated_Sequence', 'HGNC_Symbol',
## 'Master.Protein.Accessions', 'Master.Protein.Descriptions', 'xenograft',
## 'treatment'. You can override using the `.groups` argument.
sum((is.na(pY_Batch1_by_cond) %>% rowSums() ) ==0 ) 
## [1] 231
pY_Batch1_by_cond_noNA <- pY_Batch1_by_cond %>%  .[( (is.na(pY_Batch1_by_cond ) %>% rowSums() ) ==0),]

pY_mat_Batch1_by_cond_noNA <- pY_Batch1_by_cond_noNA %>% 
  mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
  column_to_rownames("rname") %>%
  select(all_of( contains("Xenograft") )) %>%
  as.matrix() %>%
  t
Day 5
sum((is.na(pY_Batch1_by_cond %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions', contains("_5d")) ) %>% rowSums() ) ==0 ) 
## [1] 627
pY_Batch1_by_cond_5d_noNA <- pY_Batch1_by_cond %>%  .[( (is.na(pY_Batch1_by_cond %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions', contains("_5d")) ) %>% rowSums() ) ==0 ), ] %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions', contains("_5d") )

pY_mat_Batch1_by_cond_5d_noNA <- pY_Batch1_by_cond_5d_noNA %>% 
  mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
  column_to_rownames("rname") %>%
  select(all_of( contains("Xenograft") )) %>%
  as.matrix() %>%
  t

rm(list=setdiff(ls(), c("pY_mat_Batch1_by_cond_noNA", "pY_Batch1_by_cond_noNA", 
                        "pY_mat_Batch1_by_cond_5d_noNA", "pY_Batch1_by_cond_5d_noNA" )  ))

Batch 2

Load

load("../Data/Cache/Xenografts_Batch2_DataProcessing.RData")

Process

Combine
pY_Set5_Xenograft1_form <- pY_Set5_normXenograft1_form %>% 
  select(Annotated_Sequence, HGNC_Symbol, Master.Protein.Accessions, Master.Protein.Descriptions, contains("log2FC_Xenograft1_"))
pY_Set5_Xenograft14_form <- pY_Set5_normXenograft14_form %>% 
  select(Annotated_Sequence, HGNC_Symbol, Master.Protein.Accessions, Master.Protein.Descriptions, contains("log2FC_Xenograft14_"))

all_pY_peptides_form <- rbind( (pY_Set1_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ), 
(pY_Set3_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ), 
(pY_Set4_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ),  
(pY_Set5_Xenograft1_form %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ), 
(pY_Set5_Xenograft14_form  %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ) )%>%
  unique

pY_Batch2_form <- plyr::join_all(list(all_pY_peptides_form, pY_Set1_form,
                    pY_Set3_form,pY_Set4_form, 
                    pY_Set5_Xenograft1_form, pY_Set5_Xenograft14_form ), 
               by=c('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions' ), 
               type='left') %>% as_tibble() 

#pY_Batch2[((is.na(pY_Batch2) %>% rowSums() ) ==0 ),] %>%
#  pivot_longer(cols = contains("Xenograft") , names_to = "sample", values_to = "log2FC_over_ctrl") %>%
#  mutate(sample = str_remove( sample, "log2FC_Xenograft") ) %>%
#  separate( sample , into = c("xenograft", "treatment", "timepoint", "replicate", "set"), remove = F) %>%
#  ggplot(aes(x = interaction(timepoint, treatment, xenograft),  y = log2FC_over_ctrl, fill = treatment) ) +
#  geom_boxplot() +
#  geom_point() +
#  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
#  facet_wrap(~ HGNC_Symbol + Annotated_Sequence)

pY_Batch2_form
## # A tibble: 1,380 × 72
##    Annotated_Sequence  HGNC_Symbol Master.Protein.Acces…¹ Master.Protein.Descr…²
##    <chr>               <chr>       <chr>                  <chr>                 
##  1 AAGPGWAAyER         IQGAP3      Q86VI3                 Ras GTPase-activating…
##  2 AATSDLEHyDKTR       NUCB2       P80303                 Nucleobindin-2 OS=Hom…
##  3 AATSGVPSIYAPSTyAHL… LSR         Q86X29                 Lipolysis-stimulated …
##  4 AATSGVPSIyAPSTYAHL… LSR         Q86X29                 Lipolysis-stimulated …
##  5 AAVPSGASTGIyEALELR… ENO1        P06733                 Alpha-enolase OS=Homo…
##  6 AAYFGIyDTAK         SLC25A5     P05141                 ADP/ATP translocase 2…
##  7 AAyFGIYDTAK         SLC25A5     P05141                 ADP/ATP translocase 2…
##  8 ACGSSEASAyLDELR     TRPM4       Q8TD43                 Transient receptor po…
##  9 ACPDSLGSPAPSHAyHGG… ANO1        Q5XXA6                 Anoctamin-1 OS=Homo s…
## 10 ACQSIyPLHDVFVR      RPS3A       P61247                 40S ribosomal protein…
## # ℹ 1,370 more rows
## # ℹ abbreviated names: ¹​Master.Protein.Accessions, ²​Master.Protein.Descriptions
## # ℹ 68 more variables: log2FC_Xenograft1_ctrl_5d_1_Set1 <dbl>,
## #   log2FC_Xenograft1_ctrl_5d_2_Set1 <dbl>,
## #   log2FC_Xenograft1_ctrl_5d_3_Set1 <dbl>,
## #   log2FC_Xenograft1_ctrl_5d_4_Set1 <dbl>,
## #   log2FC_Xenograft1_ctrl_5d_5_Set1 <dbl>, …
pY_mat_Batch2 <- pY_Batch2_form %>% 
  mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
  column_to_rownames("rname") %>%
  select(all_of( contains("Xenograft") )) %>%
  as.matrix() %>%
  t
pY_mat_Batch2[1:5,1:5]
##                                  IQGAP3_AAGPGWAAyER NUCB2_AATSDLEHyDKTR
## log2FC_Xenograft1_ctrl_5d_1_Set1         0.22305250           0.6894926
## log2FC_Xenograft1_ctrl_5d_2_Set1        -0.44856667          -0.5706067
## log2FC_Xenograft1_ctrl_5d_3_Set1        -0.09876451          -0.5666204
## log2FC_Xenograft1_ctrl_5d_4_Set1         0.20796365          -0.3686920
## log2FC_Xenograft1_ctrl_5d_5_Set1        -0.19747354           0.2524627
##                                  LSR_AATSGVPSIYAPSTyAHLSPAK
## log2FC_Xenograft1_ctrl_5d_1_Set1                -0.00930291
## log2FC_Xenograft1_ctrl_5d_2_Set1                -0.49322341
## log2FC_Xenograft1_ctrl_5d_3_Set1                 0.23001078
## log2FC_Xenograft1_ctrl_5d_4_Set1                 0.42668463
## log2FC_Xenograft1_ctrl_5d_5_Set1                -0.29054769
##                                  LSR_AATSGVPSIyAPSTYAHLSPAK
## log2FC_Xenograft1_ctrl_5d_1_Set1                 0.20438777
## log2FC_Xenograft1_ctrl_5d_2_Set1                -0.63605823
## log2FC_Xenograft1_ctrl_5d_3_Set1                -0.07443694
## log2FC_Xenograft1_ctrl_5d_4_Set1                 0.33563739
## log2FC_Xenograft1_ctrl_5d_5_Set1                -0.07609956
##                                  ENO1_AAVPSGASTGIyEALELRDNDK
## log2FC_Xenograft1_ctrl_5d_1_Set1                  -0.8448133
## log2FC_Xenograft1_ctrl_5d_2_Set1                   1.7020024
## log2FC_Xenograft1_ctrl_5d_3_Set1                   0.7442778
## log2FC_Xenograft1_ctrl_5d_4_Set1                   0.9193946
## log2FC_Xenograft1_ctrl_5d_5_Set1                  -1.3016477
By condition
All
pY_Batch2_by_cond <- pY_Batch2_form %>%
  pivot_longer(cols = contains("Xenograft") , names_to = "sample", values_to = "log2FC_over_ctrl") %>%
  mutate(sample = str_remove( sample, "log2FC_Xenograft") ) %>%
  separate( sample , into = c("xenograft", "treatment", "timepoint", "replicate", "set"), remove = F) %>%
  group_by(Annotated_Sequence, HGNC_Symbol, Master.Protein.Accessions, Master.Protein.Descriptions, xenograft, treatment,
           timepoint) %>%
  summarise(mean_log2FC_per_xeno_treat_time = mean(log2FC_over_ctrl, na.rm = T)) %>%
  ungroup() %>%
  mutate(sample_group = paste0("Xenograft", xenograft, "_" , treatment, "_" , timepoint )) %>%
  select(-xenograft, -treatment, -timepoint) %>%
  pivot_wider( values_from =  mean_log2FC_per_xeno_treat_time, names_from = sample_group)
## `summarise()` has grouped output by 'Annotated_Sequence', 'HGNC_Symbol',
## 'Master.Protein.Accessions', 'Master.Protein.Descriptions', 'xenograft',
## 'treatment'. You can override using the `.groups` argument.
sum((is.na(pY_Batch2_by_cond %>% select(-Xenograft14_EC_24h)) %>% rowSums() ) ==0 ) 
## [1] 294
pY_Batch2_by_cond_noNA <- pY_Batch2_by_cond %>% select(-Xenograft14_EC_24h) %>% .[( (is.na(pY_Batch2_by_cond %>% select(-Xenograft14_EC_24h)) %>% rowSums() ) ==0),]

pY_mat_Batch2_by_cond_noNA <- pY_Batch2_by_cond_noNA %>% 
  mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
  column_to_rownames("rname") %>%
  select(all_of( contains("Xenograft") )) %>%
  as.matrix() %>%
  t
Day 5
sum((is.na(pY_Batch2_by_cond %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions', contains("_5d")) ) %>% rowSums() ) ==0 ) 
## [1] 294
pY_Batch2_by_cond_5d_noNA <- pY_Batch2_by_cond %>%  .[( (is.na(pY_Batch2_by_cond %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions', contains("_5d")) ) %>% rowSums() ) ==0 ), ] %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions', contains("_5d") )

pY_mat_Batch2_by_cond_5d_noNA <- pY_Batch2_by_cond_5d_noNA %>% 
  mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
  column_to_rownames("rname") %>%
  select(all_of( contains("Xenograft") )) %>%
  as.matrix() %>%
  t

rm(list=setdiff(ls(), c("pY_mat_Batch2_by_cond_noNA", "pY_Batch2_by_cond_noNA", 
                        "pY_mat_Batch1_by_cond_noNA", "pY_Batch1_by_cond_noNA", 
                        "pY_mat_Batch1_by_cond_5d_noNA", "pY_Batch1_by_cond_5d_noNA",
                        "pY_mat_Batch2_by_cond_5d_noNA", "pY_Batch2_by_cond_5d_noNA")  ))

Load libraries and functions

StringDB

string_db <- STRINGdb::STRINGdb$new( version="11.5", species=9606,
 score_threshold=400, input_directory="../../../General/Code/Data/")

PhosphositePlus Databases

kinase_substrate_dataset <- read_delim("../../../General/Code/Data/Kinase_Substrate_Dataset", delim = "\t", skip = 2) %>% filter(KIN_ORGANISM == "human")
## Rows: 22608 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (14): GENE, KINASE, KIN_ACC_ID, KIN_ORGANISM, SUBSTRATE, SUB_ACC_ID, SUB...
## dbl  (2): SUB_GENE_ID, SITE_GRP_ID
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
kinase_substrate_dataset
## # A tibble: 17,862 × 16
##    GENE    KINASE KIN_ACC_ID KIN_ORGANISM SUBSTRATE  SUB_GENE_ID SUB_ACC_ID
##    <chr>   <chr>  <chr>      <chr>        <chr>            <dbl> <chr>     
##  1 EIF2AK1 HRI    Q9BQI3     human        eIF2-alpha       54318 P68101    
##  2 EIF2AK1 HRI    Q9BQI3     human        eIF2-alpha   100339093 P83268    
##  3 EIF2AK1 HRI    Q9BQI3     human        eIF2-alpha        1965 P05198    
##  4 EIF2AK1 HRI    Q9BQI3     human        eIF2-alpha        1965 P05198    
##  5 PRKCD   PKCD   Q05655     human        syndecan-4       24771 P34901    
##  6 PRKCD   PKCD   Q05655     human        ANKRD54         223690 Q91WK7    
##  7 PRKCD   PKCD   Q05655     human        HDAC5            10014 Q9UQL6    
##  8 PRKCD   PKCD   Q05655     human        PTPRA iso2        5786 P18433-2  
##  9 PRKCD   PKCD   Q05655     human        Bcl-2              596 P10415    
## 10 PRKCD   PKCD   Q05655     human        hnRNP K           3190 P61978    
## # ℹ 17,852 more rows
## # ℹ 9 more variables: SUB_GENE <chr>, SUB_ORGANISM <chr>, SUB_MOD_RSD <chr>,
## #   SITE_GRP_ID <dbl>, `SITE_+/-7_AA` <chr>, DOMAIN <chr>, IN_VIVO_RXN <chr>,
## #   IN_VITRO_RXN <chr>, `CST_CAT#` <chr>
Phosphorylation_site_dataset <- read_delim("../../../General/Code/Data/Phosphorylation_site_dataset", delim = "\t", skip = 2) %>% 
  filter(ORGANISM == "human")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 378662 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (8): GENE, PROTEIN, ACC_ID, HU_CHR_LOC, MOD_RSD, ORGANISM, DOMAIN, SITE_...
## dbl (7): SITE_GRP_ID, MW_kD, LT_LIT, MS_LIT, MS_CST, CST_CAT#, Ambiguous_Site
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Regulatory_sites_dataset <- read_delim("../../../General/Code/Data/Regulatory_sites", delim = "\t", skip = 2) %>% 
  filter(ORGANISM == "human") %>%
  filter(str_detect( MOD_RSD, "p") )
## New names:
## • `` -> `...21`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 19135 Columns: 21
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (15): GENE, PROTEIN, PROT_TYPE, ACC_ID, HU_CHR_LOC, ORGANISM, MOD_RSD, S...
## dbl  (5): GENE_ID, SITE_GRP_ID, LT_LIT, MS_LIT, MS_CST
## lgl  (1): ...21
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

PTMsigDB

PtmSigdb <- readxl::read_excel("../../../General/Code/Data/db_ptm.sig.db.all.v2.0.0/data_PTMsigDB_all_sites_v2.0.0.xlsx")
## Warning: Expecting numeric in D66110 / R66110C4: got '2407414-sm;d'
## Warning: Expecting numeric in D66117 / R66117C4: got '3755605-sm;d'
## Warning: Expecting numeric in D66118 / R66118C4: got '3829801-sm;d'
## Warning: Expecting numeric in D66251 / R66251C4: got '2134717-sm;u'
## Warning: Expecting numeric in D66252 / R66252C4: got '2134718-sm;u'
## Warning: Expecting numeric in D66508 / R66508C4: got '1668802-me;u'
## Warning: Expecting numeric in D66525 / R66525C4: got '1668801-me;u'
## Warning: Expecting numeric in D66592 / R66592C4: got '2295400-sm;u'
## Warning: Expecting numeric in D66650 / R66650C4: got '60885600-pa;d'
## Warning: Expecting numeric in D66651 / R66651C4: got '60885601-pa;d'
## Warning: Expecting numeric in D66652 / R66652C4: got '8545503-sm;u'
## Warning: Expecting numeric in D66653 / R66653C4: got '8545504-sm;u'
## Warning: Expecting numeric in D66654 / R66654C4: got '8545505-sm;u'
## Warning: Expecting numeric in D66748 / R66748C4: got '31089521-sm;d'
## Warning: Expecting numeric in D66819 / R66819C4: got '1668802-me;d'
## Warning: Expecting numeric in D67030 / R67030C4: got '50455000-sm;u'
## Warning: Expecting numeric in D67031 / R67031C4: got '50455001-sm;u'
## Warning: Expecting numeric in D67032 / R67032C4: got '50455002-sm;u'
## Warning: Expecting numeric in D67043 / R67043C4: got '31094520-me;u'
## Warning: Expecting numeric in D67056 / R67056C4: got '27581511-ne;d'
## Warning: Expecting numeric in D67057 / R67057C4: got '27581513-ne;d'
## Warning: Expecting numeric in D67080 / R67080C4: got '2535901-sm;d'
## Warning: Expecting numeric in D67086 / R67086C4: got '25092200-sm;d'
## Warning: Expecting numeric in D67087 / R67087C4: got '25092201-sm;d'
## Warning: Expecting numeric in D67107 / R67107C4: got '8545502-sm;d'
## Warning: Expecting numeric in D67128 / R67128C4: got '2586358-me;u'
## Warning: Expecting numeric in D67129 / R67129C4: got '2586359-me;u'
## Warning: Expecting numeric in D67130 / R67130C4: got '2586360-me;u'
## Warning: Expecting numeric in D67505 / R67505C4: got '12723517-sm;u'
## Warning: Expecting numeric in D67506 / R67506C4: got '12723518-sm;u'
## Warning: Expecting numeric in D67511 / R67511C4: got '27850501-sm;d'

Combine batch 1 and batch 2

All

all_pY_peptides_form <- rbind( (pY_Batch1_by_cond_noNA %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ), 
(pY_Batch2_by_cond_noNA %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ) ) %>%
  unique


pY_all_xeno_form <- plyr::join_all(list(all_pY_peptides_form, pY_Batch1_by_cond_noNA,
                    pY_Batch2_by_cond_noNA), 
               by=c('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions' ), 
               type='left') %>% as_tibble() 

sum((is.na(pY_all_xeno_form ) %>% rowSums() ) ==0 ) 
## [1] 111
pY_all_xeno_noNA <- pY_all_xeno_form %>% .[( (is.na(pY_all_xeno_form ) %>% rowSums() ) ==0),]

pY_mat_all_xeno_noNA <- pY_all_xeno_noNA %>% 
  mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
  column_to_rownames("rname") %>%
  select(all_of( contains("Xenograft") )) %>%
  as.matrix() %>%
  t

Day 5

all_pY_peptides_5d_form <- rbind( (pY_Batch1_by_cond_5d_noNA %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ), 
(pY_Batch2_by_cond_5d_noNA %>% select('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions') ) ) %>%
  unique


pY_all_xeno_5d_form <- plyr::join_all(list(all_pY_peptides_5d_form, pY_Batch1_by_cond_5d_noNA,
                    pY_Batch2_by_cond_5d_noNA), 
               by=c('Annotated_Sequence', 'HGNC_Symbol', 'Master.Protein.Accessions', 'Master.Protein.Descriptions' ), 
               type='left') %>% as_tibble() 

sum((is.na(pY_all_xeno_5d_form ) %>% rowSums() ) ==0 ) 
## [1] 178
pY_all_xeno_5d_noNA <- pY_all_xeno_5d_form %>% .[( (is.na(pY_all_xeno_5d_form ) %>% rowSums() ) ==0),]

pY_mat_all_xeno_5d_noNA <- pY_all_xeno_5d_noNA %>% 
  mutate( rname = paste0( HGNC_Symbol, "_", Annotated_Sequence) ) %>%
  column_to_rownames("rname") %>%
  select(all_of( contains("Xenograft") )) %>%
  as.matrix() %>%
  t

Summarised Experiment

pY all

# Assay data
assay_data_pY_all <- 
  pY_all_xeno_noNA %>% 
  mutate(coln = paste0(HGNC_Symbol, "_", Annotated_Sequence)) %>%
  as.data.frame() %>% 
  column_to_rownames("coln") %>%
  select( -Annotated_Sequence, -HGNC_Symbol, 
         -Master.Protein.Descriptions, -Master.Protein.Accessions) %>%
  as.matrix()

# RowData
rowData_pY_all <- 
  pY_all_xeno_noNA %>%
  select(Annotated_Sequence, HGNC_Symbol, Master.Protein.Descriptions, Master.Protein.Accessions) %>%
  mutate(coln = paste0(HGNC_Symbol, "_", Annotated_Sequence)) %>%
  mutate(ID = paste0(HGNC_Symbol, "_", Annotated_Sequence), 
         name = paste0(HGNC_Symbol, "_", Annotated_Sequence)) %>%
  as.data.frame() %>% 
  column_to_rownames("coln")


# ColData
colData_pY_all <- 
  str_split(colnames(assay_data_pY_all #) 
                  ), pattern = "_", simplify = TRUE ) %>%
  as.data.frame()
rownames(colData_pY_all) <-  colnames(assay_data_pY_all)
colnames(colData_pY_all) <- c( "replicate", "condition", "day")
colData_pY_all$label <- colnames(assay_data_pY_all)
colData_pY_all$ID <- colnames(assay_data_pY_all) 

pY_all_se <- SummarizedExperiment(assays = assay_data_pY_all,
                           colData = colData_pY_all,
                           rowData = rowData_pY_all)
validObject(pY_all_se)
## [1] TRUE
pY_all_se
## class: SummarizedExperiment 
## dim: 111 19 
## metadata(0):
## assays(1): ''
## rownames(111): PKP3_ADyDTLSLR PKM_AEGSDVANAVLDGADCIMLSGETAKGDyPLEAVR
##   ... PTPRA_yVNILPYDHSR LHPP_yYKETSGLMLDVGPYMK
## rowData names(6): Annotated_Sequence HGNC_Symbol ... ID name
## colnames(19): Xenograft4_E_24h Xenograft4_E_5d ... Xenograft14_EC_5d
##   Xenograft14_ctrl_5d
## colData names(5): replicate condition day label ID

pY 5 day

# Assay data
assay_data_pY_5d <- 
  pY_all_xeno_5d_noNA %>% 
  mutate(coln = paste0(HGNC_Symbol, "_", Annotated_Sequence)) %>%
  as.data.frame() %>% 
  column_to_rownames("coln") %>%
  select( -Annotated_Sequence, -HGNC_Symbol, 
         -Master.Protein.Descriptions, -Master.Protein.Accessions) %>%
  as.matrix()

# RowData
rowData_pY_5d <- 
  pY_all_xeno_5d_noNA %>%
  select(Annotated_Sequence, HGNC_Symbol, Master.Protein.Descriptions, Master.Protein.Accessions) %>%
  mutate(coln = paste0(HGNC_Symbol, "_", Annotated_Sequence)) %>%
  mutate(ID = paste0(HGNC_Symbol, "_", Annotated_Sequence), 
         name = paste0(HGNC_Symbol, "_", Annotated_Sequence)) %>%
  as.data.frame() %>% 
  column_to_rownames("coln")


# ColData
colData_pY_5d <- 
  str_split(colnames(assay_data_pY_5d #) 
                  ), pattern = "_", simplify = TRUE ) %>%
  as.data.frame()
rownames(colData_pY_5d) <-  colnames(assay_data_pY_5d)
colnames(colData_pY_5d) <- c( "replicate", "condition", "day")
colData_pY_5d$label <- colnames(assay_data_pY_5d)
colData_pY_5d$ID <- colnames(assay_data_pY_5d) 

pY_5d_se <- SummarizedExperiment(assays = assay_data_pY_5d,
                           colData = colData_pY_5d,
                           rowData = rowData_pY_5d)
validObject(pY_5d_se)
## [1] TRUE
pY_5d_se
## class: SummarizedExperiment 
## dim: 178 11 
## metadata(0):
## assays(1): ''
## rownames(178): ANK3_ADIVQQLLQQGASPNAATTSGyTPLHLSAR PKP3_ADyDTLSLR ...
##   PTPRA_yVNILPYDHSR LHPP_yYKETSGLMLDVGPYMK
## rowData names(6): Annotated_Sequence HGNC_Symbol ... ID name
## colnames(11): Xenograft4_E_5d Xenograft4_EBC_5d ... Xenograft14_EC_5d
##   Xenograft14_ctrl_5d
## colData names(5): replicate condition day label ID

Annotate phosphorylation sites

pY

# TODO How to deal with peptides which have multiple phosphorylations?


Annotate_pY_sites <- function(all_pY_sites){
  if(!all(str_count(all_pY_sites$Annotated_Sequence, "y") == 1)){warning("There are peptides with multiple pY sites")}
  tmp_sequence <- paste0("jjjjjjj", all_pY_sites$Annotated_Sequence,"jjjjjjj" )
  position_pY_phos <- str_locate(tmp_sequence, "y")[,1]
  tmp_sequence <- str_sub(tmp_sequence, (position_pY_phos-7), position_pY_phos + 7)
  tmp_sequence <- paste0( str_to_upper(str_sub(tmp_sequence,1,7 )), 
                                    str_to_lower(str_sub(tmp_sequence,8,8 )),
                                    str_to_upper(str_sub(tmp_sequence,9,15 )))
  tmp_sequence <- str_replace_all(string = tmp_sequence, pattern ="J", replacement = "")
  all_pY_sites$Modified_Sequence <- tmp_sequence
  Phosphorylation_site_dataset_pY <- 
    Phosphorylation_site_dataset %>% filter(GENE %in% all_pY_sites$HGNC_Symbol) %>%
    filter(!str_detect(PROTEIN, "iso")) %>%
    filter( str_detect( `SITE_+/-7_AA`, "y" ) ) %>% 
    mutate(Modified_Sequence = paste0( str_to_upper(str_sub(`SITE_+/-7_AA`,1,7 )), 
                                    str_to_lower(str_sub(`SITE_+/-7_AA`,8,8 )),
                                    str_to_upper(str_sub(`SITE_+/-7_AA`,9,16 ))) ) %>% 
    select(GENE, PROTEIN, HU_CHR_LOC, MOD_RSD, SITE_GRP_ID, DOMAIN, Modified_Sequence) %>%
    unique()
  
  Return_MOD_RSD <- function(HGNC_Symbol, Modified_Sequence){
    selprot <- Phosphorylation_site_dataset_pY %>% filter(GENE == HGNC_Symbol)
    if(length(str_which(selprot$Modified_Sequence, Modified_Sequence))==1){
    selsite <- selprot[ str_which(selprot$Modified_Sequence, Modified_Sequence), ]
    return(selsite$MOD_RSD)}else{return("NA")}
  }
  
  Return_DOMAIN <- function(HGNC_Symbol, Modified_Sequence){
    selprot <- Phosphorylation_site_dataset_pY %>% filter(GENE == HGNC_Symbol)
    if(length(str_which(selprot$Modified_Sequence, Modified_Sequence))==1){
    selsite <- selprot[ str_which(selprot$Modified_Sequence, Modified_Sequence), ]
    return(selsite$DOMAIN)}else{return("NA")}
  }
  
  Return_SITE_GRP_ID <- function(HGNC_Symbol, Modified_Sequence){
    selprot <- Phosphorylation_site_dataset_pY %>% filter(GENE == HGNC_Symbol)
    if(length(str_which(selprot$Modified_Sequence, Modified_Sequence))==1){
    selsite <- selprot[ str_which(selprot$Modified_Sequence, Modified_Sequence), ]
    return(selsite$SITE_GRP_ID)}else{return("NA")}
  }
  
  all_pY_sites$DOMAIN <- apply(all_pY_sites,1, function(x){ Return_DOMAIN(x[1], x[3] )}  )
  all_pY_sites$MOD_RSD <- apply(all_pY_sites,1, function(x){ Return_MOD_RSD(x[1], x[3] )}  )
  all_pY_sites$SITE_GRP_ID <- as.double( apply(all_pY_sites,1, function(x){ Return_SITE_GRP_ID(x[1], x[3] )}  ) )
  
  return(all_pY_sites)
  #all_pY_sites <- left_join(all_pY_sites,
  #          kinase_substrate_dataset %>% select(GENE, KINASE, SUBSTRATE, SUB_GENE, SUB_MOD_RSD, SITE_GRP_ID),
  #          by= "SITE_GRP_ID")
  #
  #all_pY_sites <- left_join(all_pY_sites, 
  #          Regulatory_sites_dataset %>% filter(GENE %in% unique(all_pY_sites$HGNC_Symbol), str_detect(MOD_RSD, "Y") ) , 
  #          by = c( "HGNC_Symbol" = "GENE", "MOD_RSD" )) 
}

all_pY_sites <- bind_rows(
  pY_all_xeno_noNA %>% select(HGNC_Symbol, Annotated_Sequence),
  pY_all_xeno_5d_noNA %>% select(HGNC_Symbol, Annotated_Sequence)) %>% unique

all_pY_sites <- Annotate_pY_sites(all_pY_sites)
## Warning in Annotate_pY_sites(all_pY_sites): NAs introduced by coercion

Quality control

Nr. phospho sites

print( paste( sum((is.na(pY_Batch1_by_cond_noNA) %>% rowSums() ) ==0 ) , "pY peptides passed the filtering procedure for the sets combined and the values are summed accross conditions" ))
## [1] "231 pY peptides passed the filtering procedure for the sets combined and the values are summed accross conditions"
print( paste( sum((is.na(pY_Batch2_by_cond_noNA) %>% rowSums() ) ==0 ) , "pY peptides passed the filtering procedure for the sets combined when Xenograft14_EC_24h is filtered out and the values are summed accross conditions" ))
## [1] "294 pY peptides passed the filtering procedure for the sets combined when Xenograft14_EC_24h is filtered out and the values are summed accross conditions"
print( paste( sum((is.na(pY_all_xeno_noNA) %>% rowSums() ) ==0 ) , "pY peptides passed the filtering procedure for the batches combined" ))
## [1] "111 pY peptides passed the filtering procedure for the batches combined"
print( paste( sum((is.na(pY_all_xeno_5d_noNA) %>% rowSums() ) ==0 ) , "pY peptides passed the filtering procedure for the batches combined" ))
## [1] "178 pY peptides passed the filtering procedure for the batches combined"

Principle component analysis

Phosphotyrosines all

pY_mat_all_xeno_noNA %>%
  prcomp() %>% 
  summary() %>% 
  .$importance %>% t() %>% as.data.frame() %>%
  rownames_to_column("PC") %>% 
  as_tibble() %>%
  mutate(PC = as.factor(PC)) %>%
  mutate(PC = factor(PC, levels = PC)) %>%
  ggplot(aes(PC, `Proportion of Variance` )) +
  geom_point() +
  theme(axis.text.x = element_text(angle = 90))

PCA_pY <- pY_mat_all_xeno_noNA %>%
  prcomp() %>% 
  .$x %>% 
  as.data.frame() %>%
  rownames_to_column("sample")

PCApY_rot <- pY_mat_all_xeno_noNA %>%
  prcomp() %>%
  .$rotation
PCApY_rot <- bind_cols(PCApY_rot, 
  (pY_all_xeno_noNA %>% select(HGNC_Symbol, Annotated_Sequence) ) ) %>% arrange(desc(PC1) )

PCA_pY <- PCA_pY %>% 
  mutate(sample= str_remove(sample,"Xenograft")) %>%
  separate( sample , into = c("xenograft", "treatment", "day"), sep = "_")

PCA12_pY <- PCA_pY %>% 
  ggplot(aes(PC1, PC2, color = treatment, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

PCA34_pY <- PCA_pY %>% 
  ggplot(aes(PC3, PC4, color = treatment, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

ggpubr::ggarrange(PCA12_pY, PCA34_pY)

PCA12_pY <- PCA_pY %>% 
  ggplot(aes(PC1, PC2, color = day, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

PCA34_pY <- PCA_pY %>% 
  ggplot(aes(PC3, PC4, color = day, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

ggpubr::ggarrange(PCA12_pY, PCA34_pY)

PCA12_pY <- PCA_pY %>% 
  ggplot(aes(PC1, PC2, color = xenograft, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

PCA34_pY <- PCA_pY %>% 
  ggplot(aes(PC3, PC4, color = xenograft, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

ggpubr::ggarrange(PCA12_pY, PCA34_pY)

Phosphotyrosines day 5

pY_mat_all_xeno_noNA[str_detect( rownames(pY_mat_all_xeno_noNA), "5d") | str_detect( rownames(pY_mat_all_xeno_noNA), "ctrl"),] %>%
  prcomp() %>% 
  summary() %>% 
  .$importance %>% t() %>% as.data.frame() %>%
  rownames_to_column("PC") %>% 
  as_tibble() %>%
  mutate(PC = as.factor(PC)) %>%
  mutate(PC = factor(PC, levels = PC)) %>%
  ggplot(aes(PC, `Proportion of Variance` )) +
  geom_point() +
  theme(axis.text.x = element_text(angle = 90))

PCA_pY <- pY_mat_all_xeno_noNA[str_detect( rownames(pY_mat_all_xeno_noNA), "5d") | str_detect( rownames(pY_mat_all_xeno_noNA), "ctrl"),] %>%
  prcomp() %>% 
  .$x %>% 
  as.data.frame() %>%
  rownames_to_column("sample")

PCApY_rot <- pY_mat_all_xeno_noNA[str_detect( rownames(pY_mat_all_xeno_noNA), "5d") | str_detect( rownames(pY_mat_all_xeno_noNA), "ctrl"),] %>%
  prcomp() %>%
  .$rotation
PCApY_rot <- bind_cols(PCApY_rot, 
  (pY_all_xeno_noNA %>% select(HGNC_Symbol, Annotated_Sequence) ) ) %>% arrange(desc(PC1) )

PCA_pY <- PCA_pY %>% 
  mutate(sample= str_remove(sample,"Xenograft")) %>%
  separate( sample , into = c("xenograft", "treatment", "day"), sep = "_")

PCA12_pY <- PCA_pY %>% 
  ggplot(aes(PC1, PC2, color = treatment, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

PCA34_pY <- PCA_pY %>% 
  ggplot(aes(PC3, PC4, color = treatment, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

ggpubr::ggarrange(PCA12_pY, PCA34_pY)

PCA12_pY <- PCA_pY %>% 
  ggplot(aes(PC1, PC2, color = day, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

PCA34_pY <- PCA_pY %>% 
  ggplot(aes(PC3, PC4, color = day, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

ggpubr::ggarrange(PCA12_pY, PCA34_pY)

PCA12_pY <- PCA_pY %>% 
  ggplot(aes(PC1, PC2, color = xenograft, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

PCA34_pY <- PCA_pY %>% 
  ggplot(aes(PC3, PC4, color = xenograft, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

ggpubr::ggarrange(PCA12_pY, PCA34_pY)

Phosphotyrosines 5 day dataframe

pY_mat_all_xeno_5d_noNA %>%
  prcomp() %>% 
  summary() %>% 
  .$importance %>% t() %>% as.data.frame() %>%
  rownames_to_column("PC") %>% 
  as_tibble() %>%
  mutate(PC = as.factor(PC)) %>%
  mutate(PC = factor(PC, levels = PC)) %>%
  ggplot(aes(PC, `Proportion of Variance` )) +
  geom_point() +
  theme(axis.text.x = element_text(angle = 90))

PCA_pY <- pY_mat_all_xeno_5d_noNA %>%
  prcomp() %>% 
  .$x %>% 
  as.data.frame() %>%
  rownames_to_column("sample")

PCApY_rot <- pY_mat_all_xeno_5d_noNA %>%
  prcomp() %>%
  .$rotation
PCApY_rot <- bind_cols(PCApY_rot, 
  (pY_all_xeno_5d_noNA %>% select(HGNC_Symbol, Annotated_Sequence) ) ) %>% arrange(desc(PC1) )

PCA_pY <- PCA_pY %>% 
  mutate(sample= str_remove(sample,"Xenograft")) %>%
  separate( sample , into = c("xenograft", "treatment", "day"), sep = "_")

PCA12_pY <- PCA_pY %>% 
  ggplot(aes(PC1, PC2, color = treatment, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

PCA34_pY <- PCA_pY %>% 
  ggplot(aes(PC3, PC4, color = treatment, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

ggpubr::ggarrange(PCA12_pY, PCA34_pY)

PCA12_pY <- PCA_pY %>% 
  ggplot(aes(PC1, PC2, color = day, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

PCA34_pY <- PCA_pY %>% 
  ggplot(aes(PC3, PC4, color = day, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

ggpubr::ggarrange(PCA12_pY, PCA34_pY)

PCA12_pY <- PCA_pY %>% 
  ggplot(aes(PC1, PC2, color = xenograft, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

PCA34_pY <- PCA_pY %>% 
  ggplot(aes(PC3, PC4, color = xenograft, shape = day )) +
  geom_point(size = 3) +
  theme_classic() +
  scale_colour_manual(values=PGPalette[c(5,1,4,2)]) +
  geom_text(aes(label = xenograft), color = "black", nudge_x = 1) +
  ggtitle("pY")

ggpubr::ggarrange(PCA12_pY, PCA34_pY)

Analysis

DEP

Tyrosine all

Each condition vs ctrl

data_diff_E_vs_ctrl_pY <- test_diff(pY_all_se, type="manual", test = "E_vs_ctrl")
## Tested contrasts: E_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_E_vs_ctrl_pY <- add_rejections_SH(data_diff_E_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_E_vs_ctrl_pY, contrast = "E_vs_ctrl", 
                 add_names = TRUE,
                additional_title = "pY") 
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## Loading required namespace: reactome.db
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)

## Note: Row-scaling applied for this heatmap

data_diff_EC_vs_ctrl_pY <- test_diff(pY_all_se, type="manual", test = "EC_vs_ctrl")
## Tested contrasts: EC_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EC_vs_ctrl_pY <- add_rejections_SH(data_diff_EC_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EC_vs_ctrl_pY, contrast = "EC_vs_ctrl", 
                 add_names = TRUE,
                additional_title = "pY") 
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns

## character(0)
Plot_Enrichment_Single_Pathway(dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff", 
                               pw = "Epigenetic regulation of gene expression")
data_diff_EBC_vs_ctrl_pY <- test_diff(pY_all_se, type="manual", test = "EBC_vs_ctrl")
## Tested contrasts: EBC_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EBC_vs_ctrl_pY <- add_rejections_SH(data_diff_EBC_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EBC_vs_ctrl_pY, contrast = "EBC_vs_ctrl", 
                 add_names = TRUE,
                additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EBC_vs_ctrl_pY, comparison = "EBC_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
## Warning in max(screen_pval05_pos[, logFcColStr]): no non-missing arguments to
## max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(cs1s, na.rm = TRUE): no non-missing arguments to min; returning
## Inf
## Warning in max(cs1s, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in min(cs2s, na.rm = TRUE): no non-missing arguments to min; returning
## Inf
## Warning in max(cs2s, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in min(cs3s, na.rm = TRUE): no non-missing arguments to min; returning
## Inf
## Warning in max(cs3s, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Note: Row-scaling applied for this heatmap

EC vs E

data_diff_EC_vs_E_pY <- test_diff(pY_all_se, type = "manual", 
                              test = c("EC_vs_E"))
## Tested contrasts: EC_vs_E
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EC_vs_E_pY <- add_rejections_SH(data_diff_EC_vs_E_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EC_vs_E_pY, contrast = "EC_vs_E",  add_names = TRUE, additional_title = "pY", proteins_of_interest = "EGFR")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EC_vs_E_pY, comparison = "EC_vs_E_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## [1] "Signal attenuation"                  "Cytokine Signaling in Immune system"
## [3] "RAF-independent MAPK1/3 activation"

## Note: Row-scaling applied for this heatmap

#data_results <- get_df_long(dep)

EBC vs EC

data_diff_EBC_vs_EC_pY <- test_diff(pY_all_se, type = "manual", 
                              test = c("EBC_vs_EC"))
## Tested contrasts: EBC_vs_EC
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EBC_vs_EC_pY <- add_rejections_SH(data_diff_EBC_vs_EC_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EBC_vs_EC_pY, contrast = "EBC_vs_EC",  add_names = TRUE, additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EBC_vs_EC_pY, comparison = "EBC_vs_EC_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns

## character(0)
#data_results <- get_df_long(dep)

Tyrosine day 5

Each condition vs ctrl

data_diff_E_vs_ctrl_pY <- test_diff(pY_all_se[,pY_all_se$day == "5d"], type="manual", test = "E_vs_ctrl")
## Tested contrasts: E_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_E_vs_ctrl_pY <- add_rejections_SH(data_diff_E_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_E_vs_ctrl_pY, contrast = "E_vs_ctrl", 
                 add_names = TRUE,
                additional_title = "pY") 
rowData(dep_E_vs_ctrl_pY) %>% as_tibble() %>% select(HGNC_Symbol, E_vs_ctrl_diff) %>% write.table("../Data/Kinase_enrichment/AllXenograftsCombined_E_vs_ctrl_pY_forstring.txt", quote = F, row.names = F, col.names = F)
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns

## character(0)
GSEA_E_vs_ctrl <- Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T)
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
GSEA_E_vs_ctrl %>% as_tibble() %>% filter(padj < 0.05) %>% arrange(desc(NES))
## # A tibble: 0 × 8
## # ℹ 8 variables: pathway <chr>, pval <dbl>, padj <dbl>, log2err <dbl>,
## #   ES <dbl>, NES <dbl>, size <int>, leadingEdge <list>
GSEA_E_vs_ctrl_PTM <- Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T,
         ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T)
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## [1] "PATH-NP_EGFR1_PATHWAY"
GSEA_E_vs_ctrl_PTM %>% as_tibble() %>% filter(padj < 0.05) %>% arrange(desc(NES))
## # A tibble: 1 × 8
##   pathway                   pval   padj log2err    ES   NES  size leadingEdge
##   <chr>                    <dbl>  <dbl>   <dbl> <dbl> <dbl> <int> <list>     
## 1 PATH-NP_EGFR1_PATHWAY 0.000104 0.0284   0.538 0.636  1.92    41 <chr [18]>
Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T,
         ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T, single_pathway = "PATH-NP_EGFR1_PATHWAY")
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## [1] "PATH-NP_EGFR1_PATHWAY"            "PERT-PSP_HGF"                    
## [3] "PERT-PSP_PAR1_ACTIVATING_PEPTIDE"
## Joining with `by = join_by(SITE_GRP_ID)`

## # A tibble: 47 × 4
##    HGNC_Symbol Annotated_Sequence  MOD_RSD    FC
##    <chr>       <chr>               <chr>   <dbl>
##  1 CDK1        IGEGTYGVVyKGR       Y19-p   1.41 
##  2 CTNND1      HYEDGYPGGSDNyGSLSR  Y228-p  1.24 
##  3 PTPN11      VyENVGLMQQQK        Y580-p  1.04 
##  4 PXN         VGEEEHVySFPNK       Y118-p  1.01 
##  5 PKP4        STTNyVDFYSTK        Y1168-p 0.998
##  6 MAPK3       IADPEHDHTGFLTEyVATR Y204-p  0.965
##  7 MAPK3       IADPEHDHTGFLtEyVATR Y204-p  0.965
##  8 PRKCD       RSDSASSEPVGIyQGFEK  Y313-p  0.904
##  9 PRKCD       SDSASSEPVGIyQGFEK   Y313-p  0.904
## 10 PRKCD       SDSASSEPVGIyQGFEKK  Y313-p  0.904
## # ℹ 37 more rows
Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T,
         ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T, single_pathway = "PATH-NP_KIT_RECEPTOR_PATHWAY")
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## [1] "PATH-NP_EGFR1_PATHWAY"
## Joining with `by = join_by(SITE_GRP_ID)`

## # A tibble: 4 × 4
##   HGNC_Symbol Annotated_Sequence  MOD_RSD    FC
##   <chr>       <chr>               <chr>   <dbl>
## 1 MAPK3       IADPEHDHTGFLTEyVATR Y204-p  0.965
## 2 MAPK3       IADPEHDHTGFLtEyVATR Y204-p  0.965
## 3 MAPK1       VADPDHDHTGFLTEyVATR Y187-p  0.872
## 4 MAPK1       VADPDHDHTGFLtEyVATR Y187-p  0.872
data_diff_EC_vs_ctrl_pY <- test_diff(pY_all_se[,pY_all_se$day == "5d"], type="manual", test = "EC_vs_ctrl")
## Tested contrasts: EC_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EC_vs_ctrl_pY <- add_rejections_SH(data_diff_EC_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EC_vs_ctrl_pY, contrast = "EC_vs_ctrl", 
                 add_names = TRUE,
                additional_title = "pY") 
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)

## Note: Row-scaling applied for this heatmap

Plot_Enrichment_Single_Pathway(dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff", 
                               pw = "Epigenetic regulation of gene expression")
data_diff_EBC_vs_ctrl_pY <- test_diff(pY_all_se[,pY_all_se$day == "5d"], type="manual", test = "EBC_vs_ctrl")
## Tested contrasts: EBC_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EBC_vs_ctrl_pY <- add_rejections_SH(data_diff_EBC_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EBC_vs_ctrl_pY, contrast = "EBC_vs_ctrl", 
                 add_names = TRUE,
                additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EBC_vs_ctrl_pY, comparison = "EBC_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
## Warning in max(screen_pval05_pos[, logFcColStr]): no non-missing arguments to
## max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(cs1s, na.rm = TRUE): no non-missing arguments to min; returning
## Inf
## Warning in max(cs1s, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in min(cs2s, na.rm = TRUE): no non-missing arguments to min; returning
## Inf
## Warning in max(cs2s, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in min(cs3s, na.rm = TRUE): no non-missing arguments to min; returning
## Inf
## Warning in max(cs3s, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Note: Row-scaling applied for this heatmap

EC vs E

data_diff_EC_vs_E_pY <- test_diff(pY_all_se[,pY_all_se$day == "5d"], type = "manual", 
                              test = c("EC_vs_E"))
## Tested contrasts: EC_vs_E
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EC_vs_E_pY <- add_rejections_SH(data_diff_EC_vs_E_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EC_vs_E_pY, contrast = "EC_vs_E",  add_names = TRUE, additional_title = "pY", proteins_of_interest = "EGFR")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EC_vs_E_pY, comparison = "EC_vs_E_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)

## Note: Row-scaling applied for this heatmap

#data_results <- get_df_long(dep)

EBC vs EC

data_diff_EBC_vs_EC_pY <- test_diff(pY_all_se[,pY_all_se$day == "5d"], type = "manual", 
                              test = c("EBC_vs_EC"))
## Tested contrasts: EBC_vs_EC
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EBC_vs_EC_pY <- add_rejections_SH(data_diff_EBC_vs_EC_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EBC_vs_EC_pY, contrast = "EBC_vs_EC",  add_names = TRUE, additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_form, dep_EBC_vs_EC_pY, comparison = "EBC_vs_EC_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns

## character(0)
#data_results <- get_df_long(dep)

Tyrosine 5 day dataframe

Each condition vs ctrl

data_diff_E_vs_ctrl_pY <- test_diff(pY_5d_se, type="manual", test = "E_vs_ctrl")
## Tested contrasts: E_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_E_vs_ctrl_pY <- add_rejections_SH(data_diff_E_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_E_vs_ctrl_pY, contrast = "E_vs_ctrl", 
                 add_names = TRUE,
                additional_title = "pY") 
Return_DEP_Hits_Plots(data = pY_all_xeno_5d_form, dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns

## [1] "Signaling by Receptor Tyrosine Kinases"                                
## [2] "RHO GTPases Activate NADPH Oxidases"                                   
## [3] "GAB1 signalosome"                                                      
## [4] "Estrogen-dependent nuclear events downstream of ESR-membrane signaling"
## [5] "Golgi Cisternae Pericentriolar Stack Reorganization"                   
## [6] "Negative regulation of FGFR1 signaling"
GSEA_E_vs_ctrl <- Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T)
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## [1] "Signaling by Receptor Tyrosine Kinases"                                
## [2] "RHO GTPases Activate NADPH Oxidases"                                   
## [3] "GAB1 signalosome"                                                      
## [4] "Estrogen-dependent nuclear events downstream of ESR-membrane signaling"
## [5] "Golgi Cisternae Pericentriolar Stack Reorganization"                   
## [6] "Negative regulation of FGFR1 signaling"
GSEA_E_vs_ctrl %>% as_tibble() %>% filter(padj < 0.05) %>% arrange(desc(NES))
## # A tibble: 13 × 8
##    pathway                     pval   padj log2err    ES   NES  size leadingEdge
##    <chr>                      <dbl>  <dbl>   <dbl> <dbl> <dbl> <int> <list>     
##  1 Signaling by Receptor T… 6.86e-5 0.0221   0.538 0.643  1.93    32 <chr [15]> 
##  2 Signal Transduction      1.51e-4 0.0221   0.519 0.579  1.84    59 <chr [19]> 
##  3 G alpha (q) signalling … 5.02e-4 0.0394   0.477 0.815  1.81     7 <chr [5]>  
##  4 RHO GTPases Activate NA… 1.81e-4 0.0221   0.519 0.946  1.79     4 <chr [3]>  
##  5 RAF-independent MAPK1/3… 6.67e-4 0.0483   0.477 0.889  1.79     5 <chr [4]>  
##  6 GAB1 signalosome         3.96e-4 0.0373   0.498 0.934  1.77     4 <chr [3]>  
##  7 Estrogen-dependent nucl… 4.78e-4 0.0394   0.498 0.931  1.76     4 <chr [4]>  
##  8 Golgi Cisternae Pericen… 2.11e-4 0.0221   0.519 0.969  1.69     3 <chr [3]>  
##  9 Negative regulation of … 2.11e-4 0.0221   0.519 0.969  1.69     3 <chr [3]>  
## 10 Negative regulation of … 2.11e-4 0.0221   0.519 0.969  1.69     3 <chr [3]>  
## 11 Negative regulation of … 2.11e-4 0.0221   0.519 0.969  1.69     3 <chr [3]>  
## 12 Negative regulation of … 2.11e-4 0.0221   0.519 0.969  1.69     3 <chr [3]>  
## 13 Spry regulation of FGF … 2.11e-4 0.0221   0.519 0.969  1.69     3 <chr [3]>
GSEA_E_vs_ctrl_PTM <- Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T,
         ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T)
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## [1] "PERT-PSP_VANADATE"                "PATH-NP_EGFR1_PATHWAY"           
## [3] "PERT-PSP_IL_2"                    "PERT-PSP_HGF"                    
## [5] "PERT-PSP_PAR1_ACTIVATING_PEPTIDE"
GSEA_E_vs_ctrl_PTM %>% as_tibble() %>% filter(padj < 0.05) %>% arrange(desc(NES))
## # A tibble: 14 × 8
##    pathway                    pval    padj log2err    ES   NES  size leadingEdge
##    <chr>                     <dbl>   <dbl>   <dbl> <dbl> <dbl> <int> <list>     
##  1 PERT-PSP_PHORBOL_ESTER  4.78e-5 0.00651   0.557 0.932  1.86     5 <chr [4]>  
##  2 PERT-PSP_VANADATE       5.50e-5 0.00651   0.557 0.930  1.85     5 <chr [4]>  
##  3 PATH-NP_EGFR1_PATHWAY   1.15e-4 0.00811   0.538 0.571  1.85    53 <chr [19]> 
##  4 PERT-PSP_THROMBIN       1.36e-4 0.00811   0.519 0.879  1.83     6 <chr [5]>  
##  5 PATH-NP_KIT_RECEPTOR_P… 6.55e-5 0.00651   0.538 0.957  1.80     4 <chr [4]>  
##  6 PERT-PSP_ANTI_CD3       1.60e-3 0.0341    0.455 0.752  1.76     9 <chr [5]>  
##  7 PERT-PSP_ERLOTINIB      1.36e-3 0.0313    0.455 0.894  1.68     4 <chr [4]>  
##  8 PERT-PSP_IL_2           1.36e-3 0.0313    0.455 0.894  1.68     4 <chr [4]>  
##  9 PERT-PSP_HGF            8.85e-4 0.0313    0.477 0.957  1.67     3 <chr [3]>  
## 10 PERT-PSP_PAR1_ACTIVATI… 8.85e-4 0.0313    0.477 0.957  1.67     3 <chr [3]>  
## 11 PERT-PSP_SPHINGOSINE_1… 8.85e-4 0.0313    0.477 0.957  1.67     3 <chr [3]>  
## 12 PERT-PSP_CYCLIC_STRETCH 1.27e-3 0.0313    0.455 0.951  1.66     3 <chr [3]>  
## 13 PERT-PSP_FIBRONECTIN    1.27e-3 0.0313    0.455 0.951  1.66     3 <chr [3]>  
## 14 PERT-PSP_RGD            1.27e-3 0.0313    0.455 0.951  1.66     3 <chr [3]>
Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T,
         ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T, single_pathway = "PATH-NP_EGFR1_PATHWAY")
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## [1] "PATH-NP_EGFR1_PATHWAY"            "PERT-PSP_ERLOTINIB"              
## [3] "PERT-PSP_IL_2"                    "PERT-PSP_HGF"                    
## [5] "PERT-PSP_PAR1_ACTIVATING_PEPTIDE"
## Joining with `by = join_by(SITE_GRP_ID)`

## # A tibble: 59 × 4
##    HGNC_Symbol Annotated_Sequence  MOD_RSD    FC
##    <chr>       <chr>               <chr>   <dbl>
##  1 CDK1        IGEGTYGVVyKGR       Y19-p   1.41 
##  2 CTNND1      HYEDGYPGGSDNyGSLSR  Y228-p  1.24 
##  3 PRKCD       RSDSASSEPVGIyQGFEK  Y313-p  1.22 
##  4 PRKCD       SDSASSEPVGIyQGFEK   Y313-p  1.22 
##  5 PRKCD       SDSASSEPVGIyQGFEKK  Y313-p  1.22 
##  6 PTPN11      VyENVGLMQQQK        Y580-p  1.04 
##  7 PXN         VGEEEHVySFPNK       Y118-p  1.01 
##  8 PKP4        STTNyVDFYSTK        Y1168-p 0.998
##  9 MAPK3       IADPEHDHTGFLTEyVATR Y204-p  0.965
## 10 MAPK3       IADPEHDHTGFLtEyVATR Y204-p  0.965
## # ℹ 49 more rows
Run_GSEA(DEP_result = dep_E_vs_ctrl_pY, comparison = "E_vs_ctrl_diff", return_df = T,
         ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T, single_pathway = "PATH-NP_KIT_RECEPTOR_PATHWAY")
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## [1] "PATH-NP_EGFR1_PATHWAY"            "PERT-PSP_HGF"                    
## [3] "PERT-PSP_PAR1_ACTIVATING_PEPTIDE"
## Joining with `by = join_by(SITE_GRP_ID)`

## # A tibble: 6 × 4
##   HGNC_Symbol Annotated_Sequence           MOD_RSD    FC
##   <chr>       <chr>                        <chr>   <dbl>
## 1 MAPK3       IADPEHDHTGFLTEyVATR          Y204-p  0.965
## 2 MAPK3       IADPEHDHTGFLtEyVATR          Y204-p  0.965
## 3 EGFR        GSHQISLDNPDyQQDFFPK          Y1172-p 0.930
## 4 MAPK1       VADPDHDHTGFLTEyVATR          Y187-p  0.872
## 5 MAPK1       VADPDHDHTGFLtEyVATR          Y187-p  0.872
## 6 PTK2        THAVSVSETDDyAEIIDEEDTYTMPSTR Y397-p  0.868
data_diff_EC_vs_ctrl_pY <- test_diff(pY_5d_se, type="manual", test = "EC_vs_ctrl")
## Tested contrasts: EC_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EC_vs_ctrl_pY <- add_rejections_SH(data_diff_EC_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EC_vs_ctrl_pY, contrast = "EC_vs_ctrl", 
                 add_names = TRUE,
                additional_title = "pY") 
Return_DEP_Hits_Plots(data = pY_all_xeno_5d_form, dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)

## Note: Row-scaling applied for this heatmap

GSEA_EC_vs_ctrl <- Run_GSEA(DEP_result = dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff", return_df = T)
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)
GSEA_EC_vs_ctrl %>% as_tibble() %>% filter(padj < 0.05) %>% arrange(desc(NES))
## # A tibble: 0 × 8
## # ℹ 8 variables: pathway <chr>, pval <dbl>, padj <dbl>, log2err <dbl>,
## #   ES <dbl>, NES <dbl>, size <int>, leadingEdge <list>
GSEA_EC_vs_ctrl_PTM <- Run_GSEA(DEP_result = dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff", return_df = T,
         ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T)
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## character(0)
GSEA_EC_vs_ctrl_PTM %>% as_tibble() %>% filter(padj < 0.05) %>% arrange(desc(NES))
## # A tibble: 0 × 8
## # ℹ 8 variables: pathway <chr>, pval <dbl>, padj <dbl>, log2err <dbl>,
## #   ES <dbl>, NES <dbl>, size <int>, leadingEdge <list>
Run_GSEA(DEP_result = dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff", return_df = T,
         ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T, single_pathway = "PATH-NP_EGFR1_PATHWAY")
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## character(0)
## Joining with `by = join_by(SITE_GRP_ID)`

## # A tibble: 59 × 4
##    HGNC_Symbol Annotated_Sequence              MOD_RSD    FC
##    <chr>       <chr>                           <chr>   <dbl>
##  1 PXN         VGEEEHVySFPNK                   Y118-p  0.617
##  2 LYN         EKAEERPTFDYLQSVLDDFYTATEGQyQQQP Y508-p  0.387
##  3 LYN         AEERPTFDYLQSVLDDFYTATEGQyQQQP   Y508-p  0.387
##  4 PKP3        ADyDTLSLR                       Y176-p  0.361
##  5 CTNND1      HYEDGYPGGSDNyGSLSR              Y228-p  0.345
##  6 PKP4        STTNyVDFYSTK                    Y1168-p 0.287
##  7 ESYT1       HLSPyATLTVGDSSHK                Y822-p  0.268
##  8 EGFR        GSHQISLDNPDyQQDFFPK             Y1172-p 0.260
##  9 PTPRA       VVQEYIDAFSDyANFK                Y798-p  0.215
## 10 INPPL1      NSFNNPAyYVLEGVPHQLLPPEPPSPAR    Y986-p  0.191
## # ℹ 49 more rows
Run_GSEA(DEP_result = dep_EC_vs_ctrl_pY, comparison = "EC_vs_ctrl_diff", return_df = T,
         ptmGSEA_site_df = all_pY_sites, PtmSigdb = PtmSigdb, ptmGSEA = T, single_pathway = "PATH-NP_KIT_RECEPTOR_PATHWAY")
## Joining with `by = join_by(HGNC_Symbol, Annotated_Sequence)`
## character(0)
## Joining with `by = join_by(SITE_GRP_ID)`

## # A tibble: 6 × 4
##   HGNC_Symbol Annotated_Sequence           MOD_RSD      FC
##   <chr>       <chr>                        <chr>     <dbl>
## 1 PTK2        THAVSVSETDDyAEIIDEEDTYTMPSTR Y397-p   0.384 
## 2 EGFR        GSHQISLDNPDyQQDFFPK          Y1172-p  0.260 
## 3 MAPK1       VADPDHDHTGFLTEyVATR          Y187-p   0.0162
## 4 MAPK1       VADPDHDHTGFLtEyVATR          Y187-p   0.0162
## 5 MAPK3       IADPEHDHTGFLTEyVATR          Y204-p  -0.233 
## 6 MAPK3       IADPEHDHTGFLtEyVATR          Y204-p  -0.233
data_diff_EBC_vs_ctrl_pY <- test_diff(pY_5d_se, type="manual", test = "EBC_vs_ctrl")
## Tested contrasts: EBC_vs_ctrl
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EBC_vs_ctrl_pY <- add_rejections_SH(data_diff_EBC_vs_ctrl_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EBC_vs_ctrl_pY, contrast = "EBC_vs_ctrl", 
                 add_names = TRUE,
                additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_5d_form, dep_EBC_vs_ctrl_pY, comparison = "EBC_vs_ctrl_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## character(0)

## Note: Row-scaling applied for this heatmap

EC vs E

data_diff_EC_vs_E_pY <- test_diff(pY_5d_se, type = "manual", 
                              test = c("EC_vs_E"))
## Tested contrasts: EC_vs_E
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EC_vs_E_pY <- add_rejections_SH(data_diff_EC_vs_E_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EC_vs_E_pY, contrast = "EC_vs_E",  add_names = TRUE, additional_title = "pY", proteins_of_interest = "EGFR")
Return_DEP_Hits_Plots(data = pY_all_xeno_5d_form, dep_EC_vs_E_pY, comparison = "EC_vs_E_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## Warning in fgseaMultilevel(pathways = pathways, stats = stats, minSize =
## minSize, : There were 1 pathways for which P-values were not calculated
## properly due to unbalanced (positive and negative) gene-level statistic values.
## For such pathways pval, padj, NES, log2err are set to NA. You can try to
## increase the value of the argument nPermSimple (for example set it nPermSimple
## = 10000)
## [1] "Golgi Cisternae Pericentriolar Stack Reorganization"
## [2] "Signal Transduction"                                
## [3] "Cytokine Signaling in Immune system"                
## [4] "RHO GTPases Activate NADPH Oxidases"                
## [5] "Insulin receptor signalling cascade"                
## [6] "PIP3 activates AKT signaling"

## Note: Row-scaling applied for this heatmap

#data_results <- get_df_long(dep)

EBC vs EC

data_diff_EBC_vs_EC_pY <- test_diff(pY_5d_se, type = "manual", 
                              test = c("EBC_vs_EC"))
## Tested contrasts: EBC_vs_EC
## Warning in fdrtool::fdrtool(res$t, plot = FALSE, verbose = FALSE): There may be
## too few input test statistics for reliable FDR calculations!
dep_EBC_vs_EC_pY <- add_rejections_SH(data_diff_EBC_vs_EC_pY, alpha = 0.05, lfc = log2(1.2))
GGPlotly_Volcano(dep_EBC_vs_EC_pY, contrast = "EBC_vs_EC",  add_names = TRUE, additional_title = "pY")
Return_DEP_Hits_Plots(data = pY_all_xeno_5d_form, dep_EBC_vs_EC_pY, comparison = "EBC_vs_EC_diff")
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns

## character(0)
#data_results <- get_df_long(dep)

Session Info

sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] lubridate_1.9.2             forcats_1.0.0              
##  [3] stringr_1.5.0               dplyr_1.1.2                
##  [5] purrr_1.0.2                 readr_2.1.4                
##  [7] tidyr_1.3.0                 tibble_3.2.1               
##  [9] ggplot2_3.4.2               tidyverse_2.0.0            
## [11] mdatools_0.14.0             SummarizedExperiment_1.28.0
## [13] GenomicRanges_1.50.2        GenomeInfoDb_1.34.9        
## [15] MatrixGenerics_1.10.0       matrixStats_1.0.0          
## [17] DEP_1.20.0                  org.Hs.eg.db_3.16.0        
## [19] AnnotationDbi_1.60.2        IRanges_2.32.0             
## [21] S4Vectors_0.36.2            Biobase_2.58.0             
## [23] BiocGenerics_0.44.0         fgsea_1.24.0               
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.3             shinydashboard_0.7.2   proto_1.0.0           
##   [4] gmm_1.8                tidyselect_1.2.0       RSQLite_2.3.1         
##   [7] htmlwidgets_1.6.2      grid_4.2.3             BiocParallel_1.32.6   
##  [10] norm_1.0-11.1          munsell_0.5.0          codetools_0.2-19      
##  [13] preprocessCore_1.60.2  chron_2.3-61           DT_0.28               
##  [16] withr_2.5.0            colorspace_2.1-0       highr_0.10            
##  [19] knitr_1.43             rstudioapi_0.15.0      ggsignif_0.6.4        
##  [22] mzID_1.36.0            labeling_0.4.2         GenomeInfoDbData_1.2.9
##  [25] bit64_4.0.5            farver_2.1.1           pheatmap_1.0.12       
##  [28] vctrs_0.6.3            generics_0.1.3         xfun_0.40             
##  [31] timechange_0.2.0       R6_2.5.1               doParallel_1.0.17     
##  [34] clue_0.3-64            MsCoreUtils_1.10.0     bitops_1.0-7          
##  [37] cachem_1.0.8           DelayedArray_0.24.0    assertthat_0.2.1      
##  [40] promises_1.2.1         scales_1.2.1           vroom_1.6.3           
##  [43] gtable_0.3.3           affy_1.76.0            sandwich_3.0-2        
##  [46] rlang_1.1.1            mzR_2.32.0             GlobalOptions_0.1.2   
##  [49] rstatix_0.7.2          lazyeval_0.2.2         impute_1.72.3         
##  [52] broom_1.0.5            BiocManager_1.30.22    yaml_2.3.7            
##  [55] abind_1.4-5            crosstalk_1.2.0        backports_1.4.1       
##  [58] httpuv_1.6.11          tools_4.2.3            affyio_1.68.0         
##  [61] ellipsis_0.3.2         gplots_3.1.3           jquerylib_0.1.4       
##  [64] RColorBrewer_1.1-3     STRINGdb_2.10.1        MSnbase_2.24.2        
##  [67] gsubfn_0.7             Rcpp_1.0.11            hash_2.2.6.2          
##  [70] plyr_1.8.8             zlibbioc_1.44.0        RCurl_1.98-1.12       
##  [73] ggpubr_0.6.0           sqldf_0.4-11           GetoptLong_1.0.5      
##  [76] cowplot_1.1.1          zoo_1.8-12             cluster_2.1.4         
##  [79] magrittr_2.0.3         data.table_1.14.8      circlize_0.4.15       
##  [82] reactome.db_1.82.0     pcaMethods_1.90.0      mvtnorm_1.2-2         
##  [85] ProtGenerics_1.30.0    hms_1.1.3              mime_0.12             
##  [88] evaluate_0.21          xtable_1.8-4           XML_3.99-0.14         
##  [91] readxl_1.4.3           shape_1.4.6            compiler_4.2.3        
##  [94] KernSmooth_2.23-22     ncdf4_1.21             crayon_1.5.2          
##  [97] htmltools_0.5.6        later_1.3.1            tzdb_0.4.0            
## [100] DBI_1.1.3              ComplexHeatmap_2.14.0  MASS_7.3-60           
## [103] tmvtnorm_1.5           Matrix_1.6-0           car_3.1-2             
## [106] cli_3.6.1              vsn_3.66.0             imputeLCMD_2.1        
## [109] parallel_4.2.3         igraph_1.5.1           pkgconfig_2.0.3       
## [112] plotly_4.10.2          MALDIquant_1.22.1      foreach_1.5.2         
## [115] bslib_0.5.0            XVector_0.38.0         digest_0.6.33         
## [118] Biostrings_2.66.0      rmarkdown_2.23         cellranger_1.1.0      
## [121] fastmatch_1.1-3        shiny_1.7.4.1          gtools_3.9.4          
## [124] rjson_0.2.21           lifecycle_1.0.3        jsonlite_1.8.7        
## [127] carData_3.0-5          viridisLite_0.4.2      limma_3.54.2          
## [130] fansi_1.0.4            pillar_1.9.0           lattice_0.21-8        
## [133] KEGGREST_1.38.0        fastmap_1.1.1          httr_1.4.6            
## [136] plotrix_3.8-2          glue_1.6.2             fdrtool_1.2.17        
## [139] png_0.1-8              iterators_1.0.14       bit_4.0.5             
## [142] stringi_1.7.12         sass_0.4.7             blob_1.2.4            
## [145] caTools_1.18.2         memoise_2.0.1
knitr::knit_exit()